
# Load (install) packages this replication file depends on. 
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(estimatr)
library(randomizr)
library(ri2)
library(knitr)
library(kableExtra)
library(ggbeeswarm)
library(lemon)
library(cowplot)
library(dbarts)

# Theme for pretty plots (optional)
devtools::source_url("https://kylepeyton.github.io/assets/theme_kyle.R")


# Helper functions (necessary)
add_parens <- function(x, digits = 2) {
  x <- as.numeric(x)
  return(paste0("(", sprintf(paste0("%.", digits, "f"), x), ")"))
}

add_cis <- function(est, se, level = 1.96, digits = 2) {
  lb <- as.numeric(est - (level*se))
  ub <- as.numeric(est + (level*se))
  return(paste0("[", sprintf(paste0("%.", digits, "f"), lb), ",", 
                sprintf(paste0("%.", digits, "f"), ub), "]"))
}

format_num <- function(x, digits = 2) {
  x <- as.numeric(x)
  return(paste0(sprintf(paste0("%.", digits, "f"), x)))
}

make_entry <- function(est, se, p, digits = 2, level = 1.96, ci = FALSE) {
  if(ci){
    entry <- paste0(format_num(est, digits = digits), " ", 
                    add_cis(est, se, level, digits = digits)) 
  } else{
    entry <- paste0(format_num(est, digits = digits), " ", 
                    add_parens(se, digits = digits))
  }
  entry[p < 0.05] <- paste0(entry[p < 0.05], "*")
  return(entry)
}

table_entry <- function(est, se, digits = 2, level = 1.96, ci = FALSE) {
  if(ci){
    entry <- paste0(format_num(est, digits = digits), " ", 
                    add_cis(est, se, level, digits = digits)) 
  } else{
    entry <- paste0(format_num(est, digits = digits), " ", 
                    add_parens(se, digits = digits))
  }
  return(entry)
}

# Glass's Delta
glass_delta <- function(Y, Z = Z, name = "WM"){
  Y/sd(Y[Z == name], na.rm = TRUE)
}

## Read in datasets
mturk_df <- 
  read_csv("ptbias_mturk.csv") %>% 
  mutate(Z = factor(Z, levels = c("WM", "WF", "BF", "BM")))

lucid_df <- 
  read_csv("ptbias_lucid.csv") %>% 
  mutate(Z = factor(Z, levels = c("WM", "WF", "BF", "BM")))

####------------------Print reliability coefficients for DVs--------------------

# warmth/competence: 
mturk_df %>% 
  filter(R == 1) %>% 
  dplyr::select(y_scale_tolerant, y_scale_warm, y_scale_sincere,
                y_scale_gnature) %>% 
  psych::alpha()

mturk_df %>% 
  filter(R == 1) %>% 
  dplyr::select(y_scale_intell, y_scale_comp, y_scale_conf) %>% 
  psych::alpha()


lucid_df %>% 
  filter(R == 1) %>% 
  dplyr::select(y_scale_kind, y_scale_open, y_scale_trustworthy) %>% 
  psych::alpha()

lucid_df %>% 
  filter(R == 1) %>% 
  dplyr::select(y_scale_intelligent, y_scale_competent, y_scale_qualified) %>% 
  psych::alpha()

# Primary DVs: 
primary_dvs <- c("y_confdoc", "y_whichd", "y_askmor", "y_satis", "y_rcmnd")

mturk_df %>% 
  filter(R == 1) %>% 
  dplyr::select(primary_dvs) %>% 
  psych::alpha()

lucid_df %>% 
  filter(R == 1) %>% 
  dplyr::select(primary_dvs) %>% 
  psych::alpha()

## Stacked dataset of outcomes and covariates: 
covs <- c("dem_trustdoc", "dem_race4", "dem_age", "dem_female", "dem_college",
          "health_pastvisit", "health_mental", "health_overall","insure_cover",
          "insure_unpaid")

est_df <- 
  bind_rows(mturk_df[, c("Z", "S", "R", "primary_dvs_scale", primary_dvs, 
                         "y_warm_index", "y_comp_index", "S", covs)],
            lucid_df[, c("Z", "S", "R", "primary_dvs_scale", primary_dvs, 
                         "y_warm_index", "y_comp_index", "S", covs)]) %>%
  group_by(S) %>% 
  mutate(primary_dvs_index = scales::rescale(primary_dvs_scale, 
                                             to = c(0, 100)))

#####-------------------------eFig 13------------------------------------------
mturk_df %>%
  select(starts_with("overt_bw")) %>% 
  na.omit() %>% 
  gather() %>% 
  mutate(group = stringr::str_split_fixed(key, "_", n = 3)[, 3],
         group = factor(group, levels = c("lazy", "unint", "untrst", "viol"),
                        labels = c("White-Black Work Ethic", 
                                   "White-Black Intelligence",
                                   "White-Black Trustworthiness",
                                   "White-Black Peacefulness"))) %>% 
  ggplot(., aes(value)) + 
  geom_histogram(bins = 13, aes(y = stat(width*density))) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap(~ group) + 
  theme_kyle() + 
  ylab("") + xlab("")

#####-------------------------eFig 14------------------------------------------
lucid_df %>%
  select(starts_with("overt_bw")) %>% 
  na.omit() %>% 
  gather() %>% 
  mutate(group = stringr::str_split_fixed(key, "_", n = 3)[, 3],
         group = factor(group, levels = c("lazy", "unint", "untrst", "viol"),
                        labels = c("White-Black Work Ethic", 
                                   "White-Black Intelligence",
                                   "White-Black Trustworthiness",
                                   "White-Black Peacefulness"))) %>% 
  ggplot(., aes(value)) + 
  geom_histogram(bins = 13, aes(y = stat(width*density))) + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap(~ group) + 
  theme_kyle() + 
  ylab("") + xlab("")

#####-------------------------eFig 15------------------------------------------
mturk_df %>% 
  select(sexism1, sexism2, sexism3, sexism4) %>% 
  na.omit() %>% 
  mutate_all(as.factor) %>% 
  gather() %>% 
  group_by(key, value) %>% 
  summarise(n = n()) %>% 
  mutate(pct = n/sum(n)) %>% 
  group_by(key) %>% 
  ggplot(., aes(y = pct, x = value)) + 
  geom_bar(stat = "identity") + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap(~ key) + 
  theme_kyle() + 
  ylab("") + xlab("")

#####-------------------------eFig 16------------------------------------------
lucid_df %>% 
  select(asi_hostile1, asi_hostile2) %>% 
  na.omit() %>% 
  mutate_all(as.factor) %>% 
  gather() %>% 
  group_by(key, value) %>% 
  summarise(n = n()) %>% 
  mutate(pct = n/sum(n)) %>% 
  group_by(key) %>% 
  ggplot(., aes(y = pct, x = value)) + 
  geom_bar(stat = "identity") + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap(~ key) + 
  theme_kyle() + 
  ylab("") + xlab("")

#####-------------------------eFig 17------------------------------------------
lucid_df %>% 
  select(asi_benevolent1, asi_benevolent2, asi_benevolent3) %>% 
  na.omit() %>% 
  mutate_all(as.factor) %>% 
  gather() %>% 
  group_by(key, value) %>% 
  summarise(n = n()) %>% 
  mutate(pct = n/sum(n)) %>% 
  group_by(key) %>% 
  ggplot(., aes(y = pct, x = value)) + 
  geom_bar(stat = "identity") + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  facet_wrap(~ key) + 
  theme_kyle() + 
  ylab("") + xlab("")

## Replicate eTables
dem_dvs <- c("dem_age", "dem_female", "dem_white", "dem_black", "dem_hispanic",
             "dem_other", "dem_college", "dem_lowinc", "insure_cover",
             "insure_unpaid", "health_mental", "health_overall", 
             "health_recentvisit")

#####-------------------------eTable 1------------------------------------------
# Flag medicaid and medicare categories:
medicaid <- c(3, 13, 23, 34, 35, 345, 12345)
medicare <- c(4, 14, 24, 34, 345, 12345)

mturk_desc <- 
  mturk_df %>%
  filter(R == 1) %>%
  mutate(dem_black = as.numeric(dem_race4 == "Black"),
         dem_white = as.numeric(dem_race4 == "White"),
         dem_hispanic = as.numeric(dem_race4 == "Hispanic"),
         dem_other = as.numeric(dem_race4 == "Other"),
         # HH income below US median of 60k (2017 est)
         dem_lowinc = as.numeric(dem_income < 13),
         dem_recentvisit = as.numeric(health_pastvisit > 0),
         dem_medicaid = as.numeric(insure_type %in% medicaid),
         dem_medicare = as.numeric(insure_type %in% medicare),
         dem_uninsure = as.numeric(insure_cover == 0),
         Study = "MTurk") %>%
  dplyr::select(dem_age, dem_female, dem_white, dem_black, dem_hispanic, 
                dem_other, dem_college, dem_lowinc, dem_uninsure, dem_medicaid,
                dem_medicare, insure_unpaid, health_mental, health_overall,
                dem_trustdoc, dem_recentvisit, Study, Z) 

lucid_desc <- 
  lucid_df %>%
  filter(R == 1) %>%
  mutate(dem_black = as.numeric(dem_race4 == "Black"),
         dem_white = as.numeric(dem_race4 == "White"),
         dem_hispanic = as.numeric(dem_race4 == "Hispanic"),
         dem_other = as.numeric(dem_race4 == "Other"),
         # HH income below US median of 60k (2017 est)
         dem_lowinc = as.numeric(dem_income < 13),
         dem_recentvisit = as.numeric(health_pastvisit > 0),
         dem_medicaid = as.numeric(insure_type %in% medicaid),
         dem_medicare = as.numeric(insure_type %in% medicare),
         dem_uninsure = as.numeric(insure_cover == 0),
         dem_trustdoc = round(scales::rescale(dem_trustdoc, to = c(1, 5))),
         Study = "Lucid") %>%
  dplyr::select(dem_age, dem_female, dem_white, dem_black, dem_hispanic, 
                dem_other, dem_college, dem_lowinc, dem_uninsure, dem_medicaid,
                dem_medicare, insure_unpaid, health_mental, health_overall,
                dem_trustdoc, dem_recentvisit, Study, Z) 

desc_table <- 
  mturk_desc %>% 
  group_by(Z) %>%
  mutate(desc_age = paste0(median(dem_age, na.rm = TRUE), " (", 
                           min(dem_age, na.rm = T), ",", 
                           max(dem_age, na.rm = T), ")"),
         desc_female = paste0(sum(dem_female, na.rm = TRUE), " (", 
                              round(mean(dem_female, na.rm = TRUE)*100, 2), "%)"),
         desc_college = paste0(sum(dem_college, na.rm = TRUE), " (", 
                               round(mean(dem_college, na.rm = TRUE)*100, 2), "%)"),
         desc_income = paste0(sum(dem_lowinc, na.rm = TRUE), " (", 
                              round(mean(dem_lowinc, na.rm = TRUE)*100, 2), "%)"),
         desc_white = paste0(sum(dem_white, na.rm = TRUE), " (", 
                             round(mean(dem_white, na.rm = TRUE)*100, 2), "%)"),
         desc_black = paste0(sum(dem_black, na.rm = TRUE), " (", 
                             round(mean(dem_black, na.rm = TRUE)*100, 2), "%)"),
         desc_hispanic = paste0(sum(dem_hispanic, na.rm = TRUE), " (", 
                                round(mean(dem_hispanic, na.rm = TRUE)*100, 2), "%)"),
         desc_other = paste0(sum(dem_other, na.rm = TRUE), " (", 
                             round(mean(dem_other, na.rm = TRUE)*100, 2), "%)"),
         desc_medicare = paste0(sum(dem_medicare, na.rm = TRUE), " (", 
                                round(mean(dem_medicare, na.rm = TRUE)*100, 2), "%)"),
         desc_medicaid = paste0(sum(dem_medicaid, na.rm = TRUE), " (", 
                                round(mean(dem_medicaid, na.rm = TRUE)*100, 2), "%)"),
         desc_uninsured = paste0(sum(dem_uninsure, na.rm = TRUE), " (", 
                                 round(mean(dem_uninsure, na.rm = TRUE)*100, 2), "%)"),
         desc_unpaid = paste0(sum(insure_unpaid, na.rm = TRUE), " (", 
                              round(mean(insure_unpaid, na.rm = TRUE)*100, 2), "%)"),
         desc_visit = paste0(sum(dem_recentvisit, na.rm = TRUE), " (", 
                             round(mean(dem_recentvisit, na.rm = TRUE)*100, 2), "%)"),
         desc_mental = paste0(round(mean(health_mental, na.rm = TRUE), 2), " (", 
                              round(sd(health_mental, na.rm = TRUE), 2), ")"),
         desc_overall = paste0(round(mean(health_overall, na.rm = TRUE), 2), " (", 
                               round(sd(health_overall, na.rm = TRUE), 2), ")"),
         desc_trust = paste0(round(mean(dem_trustdoc, na.rm = TRUE), 2), " (", 
                             round(sd(dem_trustdoc, na.rm = TRUE), 2), ")")) %>%
  ungroup() %>% 
  dplyr::select(starts_with("desc_")) %>%
  distinct(.keep_all = TRUE) %>%
  t()

colnames(desc_table) <- c("Simulated White \n Male Physician", 
                          "Simulated White \n Female Physician", 
                          "Simulated Black \n Female Physician", 
                          "Simulated Black \n Male Physician")

rownames(desc_table)[1:16] <- c("Median Age (Min, Max)", "Female", 
                                "College Educated", "HHI Below Median",
                                "White (Non-Hispanic)", "Black", 
                                "Hispanic", "Other", "Medicaid", 
                                "Medicare", "Uninsured", 
                                "Unpaid Medical Bills", 
                                "1+ ED visit last 6 MOs.",
                                "Mental Health Avg. (SD)", 
                                "Overall Health Avg. (SD)",
                                "Trust in Docs Avg. (SD)")

kable(desc_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:balance_mturk}Background Characteristics by Treatment Group in Study 1") %>%
  kable_styling(full_width = TRUE, latex_options ="HOLD_position") %>%
  column_spec(1, width = "5cm") %>%
  add_header_above(c(" " = 1, "Treatment Group" = 4)) %>%
  group_rows("Race/Ethnicity: ", 5, 8) %>%
  group_rows("Insurance status:", 9, 11) %>%
  group_rows("Healthcare experience:", 12, 13) %>%
  group_rows("Subjective assessments:", 14, 16) %>%
  footnote(general = c("Summary statistics for analysis sample."))


#####-------------------------eTable 2------------------------------------------
desc_table <- 
  lucid_desc %>% 
  group_by(Z) %>%
  mutate(desc_age = paste0(median(dem_age, na.rm = TRUE), " (", 
                           min(dem_age, na.rm = T), ",", 
                           max(dem_age, na.rm = T), ")"),
         desc_female = paste0(sum(dem_female, na.rm = TRUE), " (", 
                              round(mean(dem_female, na.rm = TRUE)*100, 2), "%)"),
         desc_college = paste0(sum(dem_college, na.rm = TRUE), " (", 
                               round(mean(dem_college, na.rm = TRUE)*100, 2), "%)"),
         desc_income = paste0(sum(dem_lowinc, na.rm = TRUE), " (", 
                              round(mean(dem_lowinc, na.rm = TRUE)*100, 2), "%)"),
         desc_white = paste0(sum(dem_white, na.rm = TRUE), " (", 
                             round(mean(dem_white, na.rm = TRUE)*100, 2), "%)"),
         desc_black = paste0(sum(dem_black, na.rm = TRUE), " (", 
                             round(mean(dem_black, na.rm = TRUE)*100, 2), "%)"),
         desc_hispanic = paste0(sum(dem_hispanic, na.rm = TRUE), " (", 
                                round(mean(dem_hispanic, na.rm = TRUE)*100, 2), "%)"),
         desc_other = paste0(sum(dem_other, na.rm = TRUE), " (", 
                             round(mean(dem_other, na.rm = TRUE)*100, 2), "%)"),
         desc_medicare = paste0(sum(dem_medicare, na.rm = TRUE), " (", 
                                round(mean(dem_medicare, na.rm = TRUE)*100, 2), "%)"),
         desc_medicaid = paste0(sum(dem_medicaid, na.rm = TRUE), " (", 
                                round(mean(dem_medicaid, na.rm = TRUE)*100, 2), "%)"),
         desc_uninsured = paste0(sum(dem_uninsure, na.rm = TRUE), " (", 
                                 round(mean(dem_uninsure, na.rm = TRUE)*100, 2), "%)"),
         desc_unpaid = paste0(sum(insure_unpaid, na.rm = TRUE), " (", 
                              round(mean(insure_unpaid, na.rm = TRUE)*100, 2), "%)"),
         desc_visit = paste0(sum(dem_recentvisit, na.rm = TRUE), " (", 
                             round(mean(dem_recentvisit, na.rm = TRUE)*100, 2), "%)"),
         desc_mental = paste0(round(mean(health_mental, na.rm = TRUE), 2), " (", 
                              round(sd(health_mental, na.rm = TRUE), 2), ")"),
         desc_overall = paste0(round(mean(health_overall, na.rm = TRUE), 2), " (", 
                               round(sd(health_overall, na.rm = TRUE), 2), ")"),
         desc_trust = paste0(round(mean(dem_trustdoc, na.rm = TRUE), 2), " (", 
                             round(sd(dem_trustdoc, na.rm = TRUE), 2), ")")) %>%
  ungroup() %>% 
  dplyr::select(starts_with("desc_")) %>%
  distinct(.keep_all = TRUE) %>%
  t()

colnames(desc_table) <- c("Simulated White \n Male Physician", 
                          "Simulated White \n Female Physician", 
                          "Simulated Black \n Female Physician", 
                          "Simulated Black \n Male Physician")

rownames(desc_table)[1:16] <- c("Median Age (Min, Max)", "Female", 
                                "College Educated", "HHI Below Median",
                                "White (Non-Hispanic)", "Black", 
                                "Hispanic", "Other", "Medicaid", 
                                "Medicare", "Uninsured", 
                                "Unpaid Medical Bills", 
                                "1+ ED visit last 6 MOs.",
                                "Mental Health Avg. (SD)", 
                                "Overall Health Avg. (SD)",
                                "Trust in Docs Avg. (SD)")

kable(desc_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:balance_lucid}Background Characteristics by Treatment Group in Study 2") %>%
  kable_styling(full_width = TRUE, latex_options ="HOLD_position") %>%
  column_spec(1, width = "5cm") %>%
  add_header_above(c(" " = 1, "Treatment Group" = 4)) %>%
  group_rows("Race/Ethnicity: ", 5, 8) %>%
  group_rows("Insurance status:", 9, 11) %>%
  group_rows("Healthcare experience:", 12, 13) %>%
  group_rows("Subjective assessments:", 14, 16) %>%
  footnote(general = c("Summary statistics for analysis sample."))

#####-------------------------eTable 3------------------------------------------

# Randomization inference. 
balance_fun <- function(data){
  fit <- lm(as.integer(Z) ~., data = data)
  summary(fit)$f[1]
}

# Study 1
ri_mturk_df <- 
  mturk_df %>%
  filter(R == 1) %>%
  dplyr::select(dem_trustdoc, dem_race4, dem_age, dem_female,
                dem_college, health_pastvisit, health_mental, 
                health_overall, insure_cover, insure_unpaid, 
                Z) 

design_study1 <- 
  declare_ra(N = nrow(ri_mturk_df), num_arms = 4, simple = TRUE, 
             conditions = c("WM", "BM", "BF", "WF"))

# Study 2
ri_lucid_df <- 
  lucid_df %>%
  filter(R == 1) %>%
  dplyr::select(dem_trustdoc, dem_race4, dem_age, dem_female,
                dem_college, health_pastvisit, health_mental, 
                health_overall, insure_cover, insure_unpaid, 
                Z)

design_study2 <- 
  declare_ra(N = nrow(ri_lucid_df), num_arms = 4, simple = TRUE, 
             conditions = c("WM", "BM", "BF", "WF"))


# Note: run this line before set.seed() if using R 3.6.0 or later for backward 
# compatibility!
RNGkind(sample.kind = "Rounding") 
set.seed(123)
ri_study1 <- 
  conduct_ri(
    test_function = balance_fun,
    declaration = design_study1, 
    assignment = "Z", 
    sharp_hypothesis = 0,
    data = ri_mturk_df, 
    sims = 10000,
    progress_bar = TRUE
  )

ri_study2 <- 
  conduct_ri(
    test_function = balance_fun,
    declaration = design_study2, 
    assignment = "Z", 
    sharp_hypothesis = 0,
    data = ri_lucid_df, 
    sims = 10000,
    progress_bar = TRUE
  )


null_dist_df <- 
  bind_rows(ri_study1$sims_df %>% mutate(S = "Study 1"),  
            ri_study2$sims_df %>% mutate(S = "Study 2"))


ri_table <- 
  null_dist_df %>%
  group_by(S) %>%
  summarise(obs = mean(est_obs), 
            ci_lower = quantile(est_sim, probs = 0.025),
            ci_upper = quantile(est_sim, probs = 0.975),
            ri_pvalue = mean(est_sim >= est_obs)
  ) 

colnames(ri_table) <- c("Study", "Observed F-Statistic", 
                        "0.025th Quantile", "0.975th Quantile", "RI P-value")
kable(ri_table, "latex", booktabs = TRUE, digits = 2,
      caption = "\\label{tab:ri}Randomization Inference (RI) for covariate balance") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "3cm") %>% 
  footnote(general = c("Quantiles of null distribution and RI P-values from 10,000 permutations of the experimental design."))  

#####-------------------------eTable 4-----------------------------------------
combine_means <- 
  est_df %>% 
  filter(R == 1) %>% 
  group_by(Z) %>% 
  summarise(est_mean = mean(primary_dvs_index),
            est_se = commarobust::se_mean(primary_dvs_index)) %>% 
  mutate(study = "Combined (N = 3215)")

mturk_means <- 
  est_df %>% 
  filter(R == 1, S == "MTurk") %>% 
  group_by(Z) %>% 
  summarise(est_mean = mean(primary_dvs_index),
            est_se = commarobust::se_mean(primary_dvs_index)) %>% 
  mutate(study = "Study 1 (N = 1619)")

lucid_means <- 
  est_df %>% 
  filter(R == 1, S == "Lucid") %>% 
  group_by(Z) %>% 
  summarise(est_mean = mean(primary_dvs_index),
            est_se = commarobust::se_mean(primary_dvs_index)) %>% 
  mutate(study = "Study 2 (N = 1596)")

means_table <- 
  bind_rows(combine_means, mturk_means, lucid_means) %>%
  mutate(Z = factor(Z, levels = c("WM", "BM", "BF", "WF"), 
                    labels = c("Simulated White \n Male Physician", 
                               "Simulated Black \n Male Physician", 
                               "Simulated Black \n Female Physician",
                               "Simulated White \n Female Physician"))) %>% 
  group_by(study) %>% 
  mutate(entry = table_entry(est = est_mean, se = est_se, level = 1.96,
                             digits = 2, ci = TRUE)) %>% 
  dplyr::select(Z, entry, study) %>%
  spread(study, entry) 

colnames(means_table)[1] <- ""
kable(means_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:means_table}Average Patient Evalution Scores on Composite Index of Primary Outcomes") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  footnote(general = c("Means and 95% CIs displayed for each group."))

#####-------------------------eTable 5------------------------------------------

## Now for all primary DVs and composite index.
est_df <- 
  bind_rows(mturk_df[, c("Z", "S", "R",  primary_dvs, "primary_dvs_scale", "S", 
                         covs)],
            lucid_df[, c("Z", "S", "R", primary_dvs, "primary_dvs_scale", "S", 
                         covs)]) %>%
  filter(R == 1) %>% 
  group_by(S) %>%
  mutate(primary_dvs_index = scales::rescale(primary_dvs_scale, to = c(0, 100))) %>% 
  mutate_at(.vars = c(primary_dvs, "primary_dvs_index"), 
            .funs = list( ~ glass_delta(., Z = Z, name = "WM"))) 


est_lin <- list()

loop_vars <- c(primary_dvs, "primary_dvs_index")

for(j in 1:length(loop_vars)){
  est_lin[[j]] <- 
    lm_lin(as.formula(paste(loop_vars[j], "~ Z")), 
           covariates = as.formula(paste("~", paste(c("S", covs), sep = " ",
                                                    collapse = "+"))), 
           data = est_df) %>%
    tidy() %>%
    mutate_if(is.numeric, round, 2) %>%
    data.frame()
}

est_lin_df <- 
  bind_rows(est_lin)

# dataframe for plot/table of covariate-adjusted treatment effects
plot_df <- 
  est_lin_df %>%
  mutate(outcome = factor(outcome, 
                          levels = c("primary_dvs_index","y_confdoc",
                                     "y_satis",
                                     "y_rcmnd", "y_whichd", "y_askmor"),
                          labels = c("Composite Index",
                                     "Patient Confidence", 
                                     "Patient Satisfaction", 
                                     "Likelihood to Recommend",
                                     "Believes Symptom Checker \n (reverse coded)",
                                     "Requests More Tests \n (reverse coded)"))) %>%
  filter(term %in% c("ZBM", "ZBF", "ZWF")) %>%
  mutate(li90 = estimate - 1.645*std.error,
         ui90 = estimate + 1.645*std.error,
         li95 = estimate - 1.96*std.error,
         ui95 = estimate + 1.96*std.error,
         term = ifelse(term == "ZBM", "Simulated \n Black Male \n Physician", term),
         term = ifelse(term == "ZBF", "Simulated \n Black Female \n Physician", term),
         term = ifelse(term == "ZWF", "Simulated \n White Female \n Physician", term),
         term = factor(term, levels =  rev(c("Simulated \n Black Male \n Physician",
                                             "Simulated \n Black Female \n Physician", 
                                             "Simulated \n White Female \n Physician"))))

combine_table <- 
  plot_df %>%
  mutate(p.value = p.adjust(p.value, method = "BH"),
         entry = make_entry(est = estimate, se = std.error, p = p.value,
                            digits = 2, ci = TRUE)) %>% 
  dplyr::select(term, entry, outcome) %>%
  spread(term, entry)

colnames(combine_table)[1] <- " "
kable(combine_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:primary_combined}Estimated Treatment Effects on Primary Outcomes in Combined Sample") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "7cm") %>% 
  add_header_above(c(" " = 1, "Treatment" = 3)) %>%
  footnote(general = c("Point estimates with 95% CIs. Estimates are presented as standardized effect sizes using Glass's Delta.", "* denotes significance at 0.05 after Benjamini & Hochberg (1995) adjustment for multiple comparisons."))

#####---------------------Manuscript Fig. 3------------------------------------

# How many of the 18 intervals fall within a .20 MOE?
 with(plot_df,
      sum(li90 > -0.20 & ui90 < 0.20))

plot_df %>%
  ggplot(aes(y = estimate, x = term)) + 
  geom_hline(yintercept = 0, size = 0.5, lty = 1) + 
  geom_hline(yintercept = 0.20, size = 0.5, lty = 3) + 
  geom_hline(yintercept = -0.20, size = 0.5, lty = 3) + 
  geom_linerange(aes(ymin = li90, ymax = ui90), size = 2, color = "black", 
                 alpha = 0.9, position = position_dodge(width = 0.9)) +
  geom_linerange(aes(ymin = li95, ymax = ui95), size = 1, color = "black", 
                 alpha = 0.9, position = position_dodge(width = 0.9)) +  
  geom_point(position = position_dodge(width = 0.9), size = 4, col = "black",
             fill = "black") + 
  scale_shape_manual(values = c(22, 23)) +
  scale_y_continuous(limits = c(-0.3, 0.3)) +
  facet_wrap(~ outcome, scales = "free_x") +
  coord_flip() + 
  theme_kyle(font_size = 20) + 
  theme(legend.position = c(0.725, 0.25),
        legend.direction = "vertical",
        legend.title = element_blank(),
        axis.title.y = element_blank(),
        panel.border = element_rect(fill = NA, colour = "black")) + 
  ylab("Estimated Treatment Effect (in Standard Units)") 

#####-------------------------eTables 6-7---------------------------------------

# Estimate effects for each sample separately and adjust for multiple 
# comparions
mturk_est_lin <- list()
lucid_est_lin <- list()

for(j in 1:length(primary_dvs)){
  mturk_est_lin[[j]] <- 
    lm_lin(as.formula(paste(primary_dvs[j], "~ Z")), 
           covariates = as.formula(paste("~", paste(covs, sep = " ",
                                                    collapse = "+"))), 
           data = est_df %>% filter(S == "MTurk")) %>%
    tidy() %>%
    mutate_if(is.numeric, round, 3) %>%
    data.frame()
  
  lucid_est_lin[[j]] <- 
    lm_lin(as.formula(paste(primary_dvs[j], "~ Z")), 
           covariates = as.formula(paste("~", paste(covs, sep = " ",
                                                    collapse = "+"))), 
           data = est_df %>% filter(S == "Lucid")) %>%
    tidy() %>%
    mutate_if(is.numeric, round, 3) %>%
    data.frame()
}

est_lin_df <- 
  bind_rows(mturk_est_lin, lucid_est_lin) %>% 
  mutate(S = c(rep("MTurk", 260), rep("Lucid", 260)))

primary_table <- 
  bind_rows(est_lin_df) %>%
  filter(term %in% c("ZBM", "ZBF", "ZWF")) %>%
  mutate(study = ifelse(S == "MTurk", "Study 1 (N = 1619)", 
                        "Study 2 (N = 1596)"),
         outcome = factor(outcome, 
                          levels = c("y_confdoc", "y_satis", "y_rcmnd", 
                                     "y_whichd", "y_askmor"),
                          labels = c("Patient Confidence", 
                                     "Patient Satisfaction", 
                                     "Likelihood to Recommend",
                                     "Believes Symptom Checker \n (reverse coded)",
                                     "Requests More Tests \n (reverse coded)")),
         term = factor(term, levels = c("ZBM", "ZBF", "ZWF"), 
                       labels = c("Simulated Black \n Male Physician", 
                                  "Simulated Black \n Female Physician", 
                                  "Simulated White \n Female Physician")))

mturk_table <- 
  primary_table  %>%
  filter(S == "MTurk") %>%
  mutate(p.value = p.adjust(p.value, method = "BH"),
         entry = make_entry(est = estimate, se = std.error, p = p.value,
                            digits = 2, ci = TRUE)) %>% 
  dplyr::select(term, entry, outcome) %>%
  spread(term, entry) 

colnames(mturk_table)[1] <- " "
kable(mturk_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:primary_mturk}Estimated Treatment Effects on Primary Outcomes in Study 1") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "7cm") %>% 
  add_header_above(c(" " = 1, "Treatment" = 3)) %>%
  footnote(general = c("Point estimates with 95% CIs. Estimates are presented as standardized effect sizes using Glass's Delta.", "* denotes significance at 0.05 after Benjamini & Hochberg (1995) adjustment for multiple comparisons."))


# Repeat the above for Lucid 
lucid_table <- 
  primary_table  %>%
  filter(S == "Lucid") %>%
  mutate(p.value = p.adjust(p.value, method = "BH"),
         entry = make_entry(est = estimate, se = std.error, p = p.value,
                            digits = 2, ci = TRUE)) %>% 
  dplyr::select(term, entry, outcome) %>%
  spread(term, entry) 

colnames(lucid_table)[1] <- " "
kable(lucid_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:primary_lucid}Estimated Treatment Effects on Primary Outcomes in Study 2") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "7cm") %>%
  add_header_above(c(" " = 1, "Treatment" = 3)) %>%
  footnote(general = c("Point estimates with 95% CIs. Estimates are presented as standardized effect sizes using Glass's Delta.", "* denotes significance at 0.05 after Benjamini & Hochberg (1995) adjustment for multiple comparisons."))

#####-------------BART Analysis (Fig. 4 in manuscript & eTable 8)--------------

## NOTE: BART samplers may take a long time to compelete, depending on your 
## machine 

## BART for MTurk. 
study_1 <- 
  mturk_df %>% 
  filter(R == 1) %>%
  mutate(primary_dvs_index = scales::rescale(primary_dvs_scale, to = c(0, 100))) %>% 
  dplyr::select(overt_binary, overt_index, sexism_index, 
                dem_trustdoc, primary_dvs_index,
                dem_age, dem_female, Z, 
                dem_race4, dem_college, dem_female, 
                attention_pass, health_pastvisit, health_mental,
                health_overall, insure_cover, insure_unpaid)

df1 <- study_1 %>% mutate(Z = "WM")
df2 <- study_1 %>% mutate(Z = "WF")
df3 <- study_1 %>% mutate(Z = "BF")
df4 <- study_1 %>% mutate(Z = "BM")

bart_df <- bind_rows(df1, df2, df3, df4)
bart_df$Z <- factor(bart_df$Z, levels = c("WM", "WF", "BF", "BM"))   

# Parameters to control 
bart_control <- dbartsControl(n.burn = 1000L, n.trees = 200L, n.chains = 4L,
                              n.thin = 5)

bart_model_composite <- dbarts(formula = primary_dvs_index ~ Z + overt_binary + 
                                 sexism_index +  as.factor(dem_race4) + 
                                 dem_college + dem_age + dem_female + 
                                 dem_trustdoc + attention_pass + 
                                 health_pastvisit + health_mental + 
                                 health_overall + insure_cover + 
                                 insure_unpaid, test = bart_df, 
                               data = study_1, n.samples = 5000,
                               control = bart_control)

# Run the BART sampler. Note that run accepts numBurnIn and numSamples. The 
# default is 200 burn in, and numSamples is whatever is specificed in dbarts. 
# The default is n.samples = 800. 

# Note: run this line before set.seed() if using R 3.6.0 or later for backward 
# compatibility!
RNGkind(sample.kind = "Rounding") 
set.seed(123)
bart_fit_composite <- bart_model_composite$run()

# Extract estimated potential outcomes for each individual. This creates a 
# matrix of dimension 2000 x n.samples x n.chains. The default behavior here
# is 800 draws from the posterior (n.chains = 800) with 4 chains (n.chains = 4).
# See ?dbartsControl for default specifications. 
bart_test_composite <- plyr::alply(bart_fit_composite$test, 3)
bart_test_composite <- do.call(cbind, bart_test_composite) 

# Diagnostics. Note what we are doing here. Every single individual in the 
# sample has an estimated Y0 and Y1
# mh_draws <- mcmc.list(mcmc(bart_test_composite[1, 1:1000], thin = 5),
#                       mcmc(bart_test_composite[1, 1001:2000], thin = 5),
#                       mcmc(bart_test_composite[1, 2001:3000], thin = 5),
#                       mcmc(bart_test_composite[1, 3001:4000], thin = 5))
# autocorr.plot(mh_draws)
# autocorr.diag(mh_draws)
# traceplot(mh_draws)
# # gelman.diag(mh_draws)

# Extract tau_i's and construct credible intervals for plotting
n <- nrow(study_1)
wf_mat <- (bart_test_composite[(n + 1):(2 * n),] - bart_test_composite[1:n,])
bf_mat <- (bart_test_composite[((2 * n) + 1):(3 * n),] - bart_test_composite[1:n,])
bm_mat <- (bart_test_composite[((3 * n) + 1):(4 * n),] - bart_test_composite[1:n,])

gg_wf <-
  tibble(
    tau_hat = apply(wf_mat, 1, mean),
    ci_upper = apply(wf_mat, 1, quantile, 0.975),
    ci_lower = apply(wf_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "White Female"
  )

gg_bf <-
  tibble(
    tau_hat = apply(bf_mat, 1, mean),
    ci_upper = apply(bf_mat, 1, quantile, 0.975),
    ci_lower = apply(bf_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "Black Female"
  )

gg_bm <-
  tibble(
    tau_hat = apply(bm_mat, 1, mean),
    ci_upper = apply(bm_mat, 1, quantile, 0.975),
    ci_lower = apply(bm_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "Black Male"
  )

gg_df <- bind_rows(gg_wf, gg_bf, gg_bm)
mturk_composite_plot_df <- data.frame(study_1, gg_df)

## BART for Lucid
study_2 <- 
  lucid_df %>% 
  filter(R == 1) %>%
  mutate(primary_dvs_index = scales::rescale(primary_dvs_scale, to = c(0, 100))) %>%
  dplyr::select(overt_binary, asi_hostile, asi_benevolent, dem_trustdoc, 
                primary_dvs_index, dem_age, dem_female, Z, dem_race4, 
                dem_college, dem_female, attention_pass, health_pastvisit, 
                health_mental, health_overall, insure_cover, insure_unpaid)

df1 <- study_2 %>% mutate(Z = "WM")
df2 <- study_2 %>% mutate(Z = "WF")
df3 <- study_2 %>% mutate(Z = "BF")
df4 <- study_2 %>% mutate(Z = "BM")

bart_df <- bind_rows(df1, df2, df3, df4)
bart_df$Z <- factor(bart_df$Z, levels = c("WM", "WF", "BF", "BM"))   

# Parameters to control 
bart_control <- dbartsControl(n.burn = 1000L, n.trees = 200L, n.chains = 4L,
                              n.thin = 5)

bart_model_composite <- dbarts(formula = primary_dvs_index ~ Z + overt_binary + 
                                 asi_benevolent + asi_hostile + 
                                 as.factor(dem_race4) + 
                                 dem_college + dem_age + dem_female + 
                                 dem_trustdoc + attention_pass + 
                                 health_pastvisit + health_mental + 
                                 health_overall + insure_cover + 
                                 insure_unpaid, test = bart_df, 
                               data = study_2, n.samples = 5000,
                               control = bart_control)

# Run the BART sampler. Note that run accepts numBurnIn and numSamples. The 
# default is 200 burn in, and numSamples is whatever is specificed in dbarts. 
# The default is n.samples = 800. 
RNGkind(sample.kind = "Rounding") 
set.seed(123)
bart_fit_composite <- bart_model_composite$run()

# Extract estimated potential outcomes for each individual. This creates a 
# matrix of dimension 2000 x n.samples x n.chains. The default behavior here
# is 800 draws from the posterior (n.chains = 800) with 4 chains (n.chains = 4).
# See ?dbartsControl for default specifications. 
bart_test_composite <- plyr::alply(bart_fit_composite$test, 3)
bart_test_composite <- do.call(cbind, bart_test_composite) 

# Diagnostics. Note what we are doing here. Every single individual in the 
# sample has an estimated Y0 and Y1
# mh_draws <- mcmc.list(mcmc(bart_test_composite[1, 1:1000], thin = 5),
#                       mcmc(bart_test_composite[1, 1001:2000], thin = 5),
#                       mcmc(bart_test_composite[1, 2001:3000], thin = 5),
#                       mcmc(bart_test_composite[1, 3001:4000], thin = 5))
# autocorr.plot(mh_draws)
# autocorr.diag(mh_draws)
# traceplot(mh_draws)
# # gelman.diag(mh_draws)

# Extract tau_i's and construct credible intervals for plotting
n <- nrow(study_2)
wf_mat <- (bart_test_composite[(n + 1):(2 * n),] - bart_test_composite[1:n,])
bf_mat <- (bart_test_composite[((2 * n) + 1):(3 * n),] - bart_test_composite[1:n,])
bm_mat <- (bart_test_composite[((3 * n) + 1):(4 * n),] - bart_test_composite[1:n,])

gg_wf <-
  tibble(
    tau_hat = apply(wf_mat, 1, mean),
    ci_upper = apply(wf_mat, 1, quantile, 0.975),
    ci_lower = apply(wf_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "White Female"
  )

gg_bf <-
  tibble(
    tau_hat = apply(bf_mat, 1, mean),
    ci_upper = apply(bf_mat, 1, quantile, 0.975),
    ci_lower = apply(bf_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "Black Female"
  )

gg_bm <-
  tibble(
    tau_hat = apply(bm_mat, 1, mean),
    ci_upper = apply(bm_mat, 1, quantile, 0.975),
    ci_lower = apply(bm_mat, 1, quantile, 0.025),
    ordering = rank(tau_hat, ties.method = "first"),
    treatment = "Black Male"
  )

gg_df <- bind_rows(gg_wf, gg_bf, gg_bm)
lucid_composite_plot_df <- data.frame(study_2, gg_df) 


# Stack datasets for plotting/tables
mturk_composite_plot_df$sample <- "Study 1"
lucid_composite_plot_df$sample <- "Study 2"

bart_df <- 
  bind_rows(mturk_composite_plot_df, lucid_composite_plot_df) %>% 
  mutate(treatment = ifelse(treatment == "Black Female", 
                            "Simulated Black \n Female Physician", treatment),
         treatment = ifelse(treatment == "Black Male", 
                            "Simulated Black \n Male Physician", treatment),
         treatment = ifelse(treatment == "White Female", 
                            "Simulated White \n Female Physician", treatment),
         treatment = factor(treatment, 
                            levels =  rev(c("Simulated Black \n Male Physician",
                                            "Simulated Black \n Female Physician",
                                            "Simulated White \n Female Physician"))))

## How many intervals exclude zero? 
bart_df %>%
  filter(sample == "Study 1") %>%
  group_by(treatment, sample) %>%
  summarise(prop_pos = mean(ci_lower > 0 & ci_upper > 0),
            prop_neg = mean(ci_lower < 0 & ci_upper < 0))


## Fig. 4 in manuscript
p1 <- 
  bart_df %>% 
  filter(sample == "Study 1") %>%
  ggplot(., aes(tau_hat, ordering)) +
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), col = "grey",
                 alpha = .5, height = 0) +
  geom_point(alpha = 0.5, size = 0.5, pch = 22, col = "black", fill = "black") +
  geom_vline(xintercept = 0, lty = 2) +
  scale_x_continuous(limits = c(-16, 16)) + 
  ggtitle("Study 1: BART Estimated Treatment Effects") + 
  xlab("") + 
  facet_wrap( ~ treatment) +
  theme_kyle(font_size = 20) + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = c(0.725, 0.25),
        legend.direction = "vertical",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.border = element_rect(fill = NA, colour = "black"),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())   


p2 <-
  bart_df %>% 
  filter(sample == "Study 2") %>%
  ggplot(., aes(tau_hat, ordering)) +
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), col = "grey",
                 alpha = .5, height = 0) +
  geom_point(alpha = 0.5, size = 0.5, pch = 22, col = "black", fill = "black") +
  geom_vline(xintercept = 0, lty = 2) +
  scale_x_continuous(limits = c(-16, 16)) + 
  ggtitle("Study 2: BART Estimated Treatment Effects") + 
  xlab("Estimated Treatment Effect on Composite Index (0 to 100)") +
  facet_wrap( ~ treatment) +
  theme_kyle(font_size = 20) + 
  theme(plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position = c(0.725, 0.25),
        legend.direction = "vertical",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        panel.border = element_rect(fill = NA, colour = "black"),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())   


# Combine plots side by side. 
pgrid <- plot_grid(p1, p2, nrow = 2)


## eTable8 in manuscript 
bart_table <- 
  bart_df %>%
  group_by(treatment, sample) %>%
  summarise(prop_pos = mean(tau_hat > 0),
            avg = mean(tau_hat),
            stdev = sd(tau_hat),
            ci_lower = quantile(tau_hat, probs = 0.025),
            ci_upper = quantile(tau_hat, probs = 0.975)) %>% 
  dplyr::select(sample, everything())

colnames(bart_table) <- c("", "Treatment", "Prop. > 0", "Mean", "SD", 
                          "0.025th Quantile", "0.975th Quantile")
kable(bart_table, "latex", booktabs = TRUE, digits = 2,
      caption = "\\label{tab:bart_summary}Summary Statistics for BART Estimated Treatment Effects on Composite Index (0-100)") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  footnote(general = c("Summary statistics of BART estimated treatment effects for each subject."))  

#####-------------------------eTables 9-1---------------------------------------
mturk_secondary_dvs <- c("y_warm_index", "y_comp_index", "y_faircost")
lucid_secondary_dvs <- c("y_warm_index", "y_comp_index", "y_error_complain",
                         "y_error_sue")

est_df <- 
  bind_rows(mturk_df[, c("Z", "S", "R", mturk_secondary_dvs, "S", covs)],
            lucid_df[, c("Z", "S", "R", lucid_secondary_dvs, "S", covs)]) %>%
  group_by(S) %>%
  mutate_at(.vars = c(union(mturk_secondary_dvs, lucid_secondary_dvs)),
            .funs = list(~glass_delta(., Z = Z, name = "WM")))

mturk_est_lin <- list()
lucid_est_lin <- list()

for(j in 1:length(mturk_secondary_dvs)){
  mturk_est_lin[[j]] <- 
    lm_lin(as.formula(paste(mturk_secondary_dvs[j], "~ Z")), 
           covariates = as.formula(paste("~", paste(covs, sep = " ",
                                                    collapse = "+"))), 
           data = est_df %>% filter(S == "MTurk")) %>%
    tidy() %>%
    mutate_if(is.numeric, round, 3) %>%
    data.frame()
  
}

for(j in 1:length(lucid_secondary_dvs)) {
  lucid_est_lin[[j]] <- 
    lm_lin(as.formula(paste(lucid_secondary_dvs[j], "~ Z")), 
           covariates = as.formula(paste("~", paste(covs, sep = " ",
                                                    collapse = "+"))), 
           data = est_df %>% filter(S == "Lucid")) %>%
    tidy() %>%
    mutate_if(is.numeric, round, 3) %>%
    data.frame()
  
}

est_lin_df <- 
  bind_rows(mturk_est_lin, lucid_est_lin) %>% 
  mutate(S = c(rep("MTurk", 156), rep("Lucid", 208)))

secondary_table <- 
  bind_rows(est_lin_df) %>%
  filter(term %in% c("ZBM", "ZBF", "ZWF")) %>%
  mutate(study = ifelse(S == "MTurk", "Study 1 (N = 1619)", 
                        "Study 2 (N = 1596)"),
         outcome = factor(outcome, levels = c("y_warm_index", "y_comp_index", 
                                              "y_faircost","y_error_complain",
                                              "y_error_sue"),
                          labels = c("Perceived Warmth", 
                                     "Perceived Competence", 
                                     "Fairness of Cost",
                                     "Complain for Error",
                                     "Sue for Error")),
         term = factor(term, levels = c("ZBM", "ZBF", "ZWF"), 
                       labels = c("Simulated Black \n Male Physician", 
                                  "Simulated Black \n Female Physician", 
                                  "Simulated White \n Female Physician"))) %>% 
  mutate(p.value = p.adjust(p.value, method = "BH")) # Adjust P-values

mturk_table <- 
  secondary_table  %>%
  filter(S == "MTurk") %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value,
                            digits = 2, ci = TRUE)) %>% 
  dplyr::select(term, entry, outcome) %>%
  spread(term, entry)

colnames(mturk_table)[1] <- " "
kable(mturk_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:secondary_mturk}Estimated Treatment Effects on Secondary Outcomes in Study 1") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "7cm") %>%
  add_header_above(c(" " = 1, "Treatment" = 3)) %>%
  footnote(general = c("Point estimates with 95% CIs. Estimates are presented as standardized effect sizes using Glass's Delta.", "* denotes significance at 0.05 after Benjamini & Hochberg (1995) adjustment for multiple comparisons."))


# Repeat for Lucid sample
lucid_table <- 
  secondary_table %>%
  filter(S == "Lucid") %>% 
  mutate(entry = make_entry(est = estimate, se = std.error, p = p.value,
                            digits = 2, ci = TRUE)) %>% 
  dplyr::select(term, entry, outcome) %>%
  spread(term, entry) 

colnames(lucid_table)[1] <- " "
kable(lucid_table, "latex", booktabs = TRUE,
      caption = "\\label{tab:secondary_lucid}Estimated Treatment Effects on Secondary Outcomes in Study 2") %>%
  kable_styling(full_width = TRUE, latex_options = "HOLD_position") %>% 
  column_spec(1, width = "7cm") %>%
  add_header_above(c(" " = 1, "Treatment" = 3)) %>%
  footnote(general = c("Point estimates with 95% CIs. Estimates are presented as standardized effect sizes using Glass's Delta.", "* denotes significance at 0.05 after Benjamini & Hochberg (1995) adjustment for multiple comparisons."))

